# Project: Serbia Survey
# Authors: William O'Brochta and Patrick Cunha Silva
# Goal: Main Analysis
# First Version: April 9, 2021
# This Version: May 10, 2023
# Last Updaded by: Patrick

library(lmtest)
library(car)
library(sandwich)
library(here)

rm(list = ls())

# Load Data
load(here('Data_Replication', 'genderedIdeologies.RData'))

# Correlation between statements
round(cor(mydf[, c('Y', 'better_world', 'most_places', 'influence')]), 2)

# Create a few variables
mydf$non_nat_man <- as.numeric(mydf$milena == 0 & mydf$slogan_cyrillic == 0)
mydf$nat_man <- as.numeric(mydf$milena == 0 & mydf$slogan_cyrillic == 1)
mydf$non_nat_woman <- as.numeric(mydf$milena == 1 & mydf$slogan_cyrillic == 0)
mydf$nat_woman <- as.numeric(mydf$milena == 1 & mydf$slogan_cyrillic == 1)

#### Run Main Model
m1 <- lm(Y ~ slogan_cyrillic * milena + cyrillic_second + cyrillic_start, data = mydf) # No Controls
se1 <- sqrt(diag(vcovHC(m1, 'HC2')))

### Is B1 + B2 + B3 (woman + cyrillic) > zero (man + latin)? 
linearHypothesis(m1, vcov. = vcovHC(m1, 'HC2'), 'slogan_cyrillic + milena + slogan_cyrillic:milena = 0') # p = 0.0165
coef(m1)['slogan_cyrillic'] + coef(m1)['milena'] + coef(m1)['slogan_cyrillic:milena']
### Is B1 + B2 + B3 (woman + cyrillic) <  B1 (man + cyrillic)?  
linearHypothesis(m1, vcov. = vcovHC(m1, 'HC2'), 'milena + slogan_cyrillic:milena + slogan_cyrillic = slogan_cyrillic') # p = 0.07686
-1 * (coef(m1)['milena'] + coef(m1)['slogan_cyrillic:milena'])
### Is  B1 + B2 + B3 (woman + cyrillic) > B2 (woman + latin)?  
linearHypothesis(m1, vcov. = vcovHC(m1, 'HC2'), "milena + slogan_cyrillic:milena + slogan_cyrillic = milena") # p = 0.0008
coef(m1)['slogan_cyrillic:milena'] + coef(m1)['slogan_cyrillic']

###Note: these tests can be directly recovered from the following model:
###We need to multiply the coefficients for non_nat_man and non_nat_woman by -1 to get the correct direction 
###It is necessary because of the way we formulate our tests in the paper.
m1_alt <- lm(Y ~ nat_man + non_nat_man + non_nat_woman + cyrillic_second + cyrillic_start, data = mydf) # No Controls
se_alt1 <- sqrt(diag(vcovHC(m1_alt, 'HC2')))
coeftest(m1_alt, vcovHC(m1_alt, 'HC2'))

### Substantive interpretation for m1
coef(m1_alt)["non_nat_man"]/sd(mydf$Y)
coef(m1_alt)["non_nat_woman"]/sd(mydf$Y)
coef(m1_alt)["non_nat_man"]/mean(mydf$Y)
coef(m1_alt)["non_nat_woman"]/mean(mydf$Y)


#### Run Model for Men Respondents
m2 <- lm(Y ~ slogan_cyrillic * milena + cyrillic_second + cyrillic_start, data = subset(mydf, gender == 'male')) # No Controls
se2 <- sqrt(diag(vcovHC(m2, 'HC2')))

###Note: LH tests can be directly recovered from the following model (for men subsample):
m2_alt <- lm(Y ~ nat_man + non_nat_man + non_nat_woman + cyrillic_second + cyrillic_start, data = subset(mydf, gender == 'male')) # No Controls
se2_alt <- sqrt(diag(vcovHC(m2_alt, 'HC2')))
coeftest(m2_alt, vcovHC(m2_alt, 'HC2'))

#### Run Model for Women Respondents
m3 <- lm(Y ~ slogan_cyrillic * milena + cyrillic_second + cyrillic_start, data = subset(mydf, gender == 'female')) # No Controls
se3 <- sqrt(diag(vcovHC(m3, 'HC2')))

###Note: LH tests can be directly recovered from the following model (for women subsample):
m3_alt <- lm(Y ~ nat_man + non_nat_man + non_nat_woman + cyrillic_second + cyrillic_start, data = subset(mydf, gender == 'female')) # No Controls
se3_alt <- sqrt(diag(vcovHC(m3_alt, 'HC2')))
coeftest(m3_alt, vcovHC(m3_alt, 'HC2'))

######################################
##### Compare other coefficients #####
######################################

# M3
a1 <- linearHypothesis(m1_alt, "non_nat_woman = nat_man", vcov. = vcovHC(m1_alt, 'HC2'))
a2 <- linearHypothesis(m1_alt, "non_nat_woman = non_nat_man", vcov. = vcovHC(m1_alt, 'HC2'))
a3 <- linearHypothesis(m1_alt, "nat_man = non_nat_man", vcov. = vcovHC(m1_alt, 'HC2'))
# M3
r1 <- linearHypothesis(m2_alt, "non_nat_woman = nat_man", vcov. = vcovHC(m2_alt, 'HC2'))
r2 <- linearHypothesis(m2_alt, "non_nat_woman = non_nat_man", vcov. = vcovHC(m2_alt, 'HC2'))
r3 <- linearHypothesis(m2_alt, "nat_man = non_nat_man", vcov. = vcovHC(m2_alt, 'HC2'))
# M4
q1 <- linearHypothesis(m3_alt, "non_nat_woman = nat_man", vcov. = vcovHC(m3_alt, 'HC2'))
q2 <- linearHypothesis(m3_alt, "non_nat_woman = non_nat_man", vcov. = vcovHC(m3_alt, 'HC2'))
q3 <- linearHypothesis(m3_alt, "nat_man = non_nat_man", vcov. = vcovHC(m3_alt, 'HC2'))

##############################
###### Create Figure 1 #######
##############################

pdf(here("WomenPaper", "Plots", 'subsample_tests.pdf'), width = 7, height = 5)
## Create betas and CIs for men subsample
toplot <- matrix(NA, nrow = 6, ncol = 4)
betas <- c(coef(m2_alt)[2:4], attributes(r1)$value, attributes(r2)$value, attributes(r3)$value)
se <- c(sqrt(diag(vcovHC(m2_alt, 'HC2')))[2:4], sqrt(attributes(r1)$vcov), sqrt(attributes(r2)$vcov), sqrt(attributes(r3)$vcov))
toplot[, 1] <- qt(0.025, m2_alt$df) * se + betas
toplot[, 2] <- qt(0.05, m2_alt$df) * se + betas
toplot[, 3] <- qt(0.95, m2_alt$df) * se + betas
toplot[, 4] <- qt(0.975, m2_alt$df) * se + betas

# Generate plot
par(mar = c(5, 10, 3, 3))
plot(x = betas, 
     y = 1:6, 
     pch = 19,
     xlim = c(-0.6, 0.6),
     xlab = 'Difference in Perceived Nationalism',
     ylab = '',
     axes = FALSE
)
sapply(1:6, \(i) lines(y = c(i, i), x = c(toplot[i, 1], toplot[i, 4])))
abline(v = 0, lty = 2, lwd = 0.5)
axis(1)
axis(2, labels = c("Man using Cyrillic -\n Woman using Cyrillic", 
                   "Man using Latin -\n Woman using Cyrillic", 
                   "Woman using Latin -\n Woman using Cyrillic", 
                   "Woman using Latin -\n Man using Cyrillic", 
                   "Woman using Latin -\n Man using Latin", 
                   "Man using Cyrillic -\n Man using Latin"),
     at = 1:6,
     las = 2)

## Create betas and CIs for women subsample
toplot <- matrix(NA, nrow = 6, ncol = 4)
betas <- c(coef(m3_alt)[2:4], attributes(q1)$value, attributes(q2)$value, attributes(q3)$value)
se <- c(sqrt(diag(vcovHC(m3_alt, 'HC2')))[2:4], sqrt(attributes(q1)$vcov), sqrt(attributes(q2)$vcov), sqrt(attributes(q3)$vcov))
toplot[, 1] <- qt(0.025, m3_alt$df) * se + betas
toplot[, 2] <- qt(0.05, m3_alt$df) * se + betas
toplot[, 3] <- qt(0.95, m3_alt$df) * se + betas
toplot[, 4] <- qt(0.975, m3_alt$df) * se + betas

# Generate plot
par(mar = c(5, 10, 3, 3))
plot(x = betas, 
     y = 1:6, 
     pch = 19,
     xlim = c(-0.6, 0.6),
     xlab = 'Difference in Perceived Nationalism',
     ylab = '',
     axes = FALSE
)
sapply(1:6, \(i) lines(y = c(i, i), x = c(toplot[i, 1], toplot[i, 4])))
abline(v = 0, lty = 2, lwd = 0.5)
axis(1)
axis(2, labels = c("Man using Cyrillic -\n Woman using Cyrillic", 
                   "Man using Latin -\n Woman using Cyrillic", 
                   "Woman using Latin -\n Woman using Cyrillic", 
                   "Woman using Latin -\n Man using Cyrillic", 
                   "Woman using Latin -\n Man using Latin", 
                   "Man using Cyrillic -\n Man using Latin"),
     at = 1:6,
     las = 2)
dev.off()


#####################################
##### Create Table for Appendix #####
#####################################

stargazer::stargazer(m1, m2, m3, 
                     se = list(se1, se2, se3),
                     covariate.labels = c('Slogan Alphabet = Cyrillic',
                                          "Politician's Gender = Woman",
                                          "Slogan Alphabet = Cyrillic x Politician's Gender = Woman",
                                          "Survey Alphabet = Cyrillic",
                                          'Survey Ad = Cyrillic'),
                     dep.var.labels = 'Perceived Nationalism',
                     column.labels = c('Full Sample', 'Men Respondents', 'Women Respondents'),
                     single.row = TRUE,
                     no.space = TRUE ,
                     omit.stat = c("ser", 'f'),                     
                     style = 'apsr',
                     type = 'text',
                     star.cutoffs = c(0.05, 0.01, 0.001),
                     title = 'Gender and Script on Citizens’ Perception of Politicians Regarding Nationalism')

